#library(gdata)
print(5)
setwd("/users/brucedesmarais/Dropbox/SenPE/Data/")
library(foreign)
sendat <- read.dta("senator_data.dta")

dat <- list()
 files <- c("form0304fnl.csv","form0506fnl.csv","form0708fnl.csv","form7980fnl.csv", "form8182fnl.csv","form8384fnl.csv", "form8586fnl.csv", "form8788fnl.csv", "form8990fnl.csv", "form9192fnl.csv", "form9394fnl.csv", "form9596fnl.csv", "form9798fnl.csv")


for(i in 1:length(files)){
	dat[[i]] <- read.csv(paste("./PEDat/",files[i],sep=""),stringsAsFactors=F, na.strings=c("NA",""))
}

# Names of senator columns in PE Data
scols <- paste("s",1:15,sep="")
# Congresses of dat
congs <- c(108,109,110,96+0:9)

# IDs for matching
id_master <- read.csv("id_master.csv",stringsAsFactors=F)


# Create the event Matrices

procName <- function(x){
	xvec <- character(length(x))
	for(i in 1:length(x)){
		chari <- x[i]
		comind <- which(strsplit(chari,split="")[[1]]==",")
		if(length(comind)==1){
		if(comind +1 < nchar(chari)){
			chari <- substr(chari,1,comind+2)
			}
			}
		xvec[i] <- chari
		}
		xvec
	}

mats <- list()

nattend <- NULL
ind <- 1
ifow <- c(1,4:13)
for(i in 1:length(dat)){
	dati <- dat[[i]]
	congi <- congs[i]
	mati <- NULL
	if(is.element(i,ifow)){
	idsi <- id_master[which(id_master[,1]==congi),5]
	idsi <- idsi[which(!is.na(idsi))]
	idsi <- procName(idsi)
	}
	if(!is.element(i,ifow)){
		idsi <- as.character(unique(c(as.matrix(dati[,scols]))))
		idsi <- idsi[which(!is.na(idsi))]
		}
	veci <- numeric(length(idsi))
	issue <- NULL
	for(j in 1:nrow(dati)){
		if(!is.na(dati$s1[j])){
			attend <- dati[j,scols]
			attend <- as.character(attend[which(!is.na(attend))])
			nattend <- c(nattend,length(attend))
			print(attend)
			attend <- procName(attend)
			vecij <- veci
			vecij[match(attend,idsi)] <- 1
			if(mean(vecij)!=0){
			mati <- rbind(mati,vecij)
			issue <- c(issue,dati$issue[j])
			}
			}
		}
		colnames(mati) <- idsi
		rownames(mati) <- issue
		mats[[ind]] <- mati
		ind <- ind+1
	}

NodeTopics <- list()
NodeDocs <- list()
# Create Weighted Networks
for(i in 4:13){
	mati <- mats[[i]]
	topsi <- list()
	docsi <- list()
	for(j in 1:ncol(mati)){
		indsj <- which(mati[,j]==1)
		topsj <- rownames(mati)[indsj]
		docsj <- paste(topsj,collapse=" ")
		topsi[[j]] <- topsj
		docsi[[j]] <- docsj
	}
	NodeTopics[[i-3]] <- topsi
	NodeDocs[[i-3]] <- docsi
	}
	names(NodeTopics) <- 96:105
	names(NodeDocs) <- 96:105

senData <- read.dta("/Users/brucedesmarais/Dropbox/SenPE/Data/fowlersenpenbinomdata.dta")	

# function for matching our labels to Fowler's labels
procFow <- function(x){
	xvec <- character(length(x))
	for(i in 1:length(x)){
		chari <- x[i]
		comind <- which(strsplit(chari,split="")[[1]]==",")
		if(length(comind)==1){
			chari <- substr(chari,1,comind-1)
			}
		xvec[i] <- chari
		}
		toupper(xvec)
	}

## Classification
# 1 - Above the 75th %tile 
# 2 - Above the 75th %tile now and last
# 0 - Neither
NodeClass <- list()
for(i in 5:13){
		mati <- mats[[i]]
		dati <- subset(senData,senData$congress==92+i)
		labsi <- procFow(dati$labels)
		namesi <- procFow(colnames(mati))
		classi <- numeric(length(namesi))
		for(j in 1:length(namesi)){
				namej <- namesi[j]
				indj <- match(namej,labsi)
				if(!is.na(indj)==1){
						centNow <- mean(dati$nc < dati$nc[indj]) >= .5
						classi[j] <- centNow
						if(!is.na(dati$ncl[indj])){
						centThen <- mean(dati$ncl < dati$ncl[indj]) >= .5
						classi[j] <- centNow+centNow*centThen
						}
					}
				} 
		NodeClass[[i-3]] <- classi
	}

library(RTextTools)	

library(tm)
nodeVec <- NULL
classes <- NULL
cong <- NULL
for(i in 2:10){
	tops <- NodeTopics[[i]]
	class <- NodeClass[[i]]
	for(j in 1:length(tops)){
		topsj <- tops[[j]]
		nodeVec <- c(nodeVec,topsj)
		classes <- c(classes,rep(class[j],length(topsj)))
		cong <- c(cong,rep(95+i,length(topsj)))
	}
}


iserr <- numeric(length(nodeVec))
for(i in 1:length(nodeVec)){
	iserr[i] <- class(try(tolower(nodeVec[i])))
	}
nodeVec <- nodeVec[-which(iserr=="try-error")]
classes <- classes[-which(iserr=="try-error")]
cong <- cong[-which(iserr=="try-error")]
nodeCorp <- Corpus(VectorSource(nodeVec))
nodeVec <- tm_map(nodeCorp,removeWords,stopwords("english"))
nodeVec <- as.character(nodeVec)

nodeDTM <- create_matrix(nodeVec,removeStopwords=T,ngramLength=2)
#nodeDTM <- DocumentTermMatrix(nodeCorp,control=list(stopwords=T))
#nodeDTM <- nodeDTM[-which(apply(nodeDTM,1,sum)==0),]
nodeDTM <- as.matrix(nodeDTM)


termSum <- numeric(ncol(nodeDTM))
for(i in 1:ncol(nodeDTM)){
	termSum[i] <- sum(nodeDTM[,i])
}

nodeDTM <- nodeDTM[,which(termSum>5)]

uc <- unique(cong)
tabList <- list()
for(i in 1:length(uc)){

termSum0 <- apply(nodeDTM[which(classes==0 & cong==uc[i]),],2,sum)
termSum1 <- apply(nodeDTM[which(classes==1 & cong==uc[i]),],2,sum)
termSum2 <- apply(nodeDTM[which(classes==2 & cong==uc[i]),],2,sum)

termTab <- cbind(colnames(nodeDTM)[order(-termSum0)[1:5]],colnames(nodeDTM)[order(-termSum1)[1:5]],colnames(nodeDTM)[order(-termSum2)[1:5]])

tabList[[i]] <- list(termTab,cbind(termSum0,termSum1,termSum2))

}

names(tabList) <- uc

# Matrix for Xtable
xtabMat <- NULL
for(i in 1:length(tabList)){
	tabi <- rbind(names(tabList)[i],tabList[[i]][[1]])
	xtabMat <- rbind(xtabMat,tabi)
}



fMat <- NULL
for(i in 2:length(tabList)){
	fMat <- rbind(fMat,cbind(tabList[[i]][[2]],tabList[[i-1]][[2]],as.numeric(names(tabList)[i])))
	
}

est1 <- glm.nb(fMat[,1]~fMat[,4:6]+as.factor(fMat[,7]),x=T)
est2 <- glm.nb(fMat[,2]~fMat[,4:6]+as.factor(fMat[,7]),x=T)
est3 <- glm.nb(fMat[,3]~fMat[,4:6]+as.factor(fMat[,7]),x=T)

twoCols1 <- NULL
for(r in 2:4){
	est <- coef(est1)[r]
	se <- summary(est1)$coef[r,2]
	if(abs(est/se) > 1.96) pchg <- round(100*(exp(est)-1),2)
	if(abs(est/se) <= 1.96) pchg <- "--"
	est <- round(est,3)
	se <- round(se,3)
	se <- paste("(",se,")",sep="")
	twoCols1 <- rbind(twoCols1,c(est,se,pchg))
}

twoCols2 <- NULL
for(r in 2:4){
	est <- coef(est2)[r]
	se <- summary(est2)$coef[r,2]
	if(abs(est/se) > 1.96) pchg <- round(100*(exp(est)-1),2)
	if(abs(est/se) <= 1.96) pchg <- "--"
	est <- round(est,3)
	se <- round(se,3)
	se <- paste("(",se,")",sep="")
	twoCols2 <- rbind(twoCols2,c(est,se,pchg))
}


twoCols3 <- NULL
for(r in 2:4){
	est <- coef(est3)[r]
	se <- summary(est3)$coef[r,2]
	if(abs(est/se) > 1.96) pchg <- round(100*(exp(est)-1),2)
	if(abs(est/se) <= 1.96) pchg <- "--"
	est <- round(est,3)
	se <- round(se,3)
	se <- paste("(",se,")",sep="")
	twoCols3 <- rbind(twoCols3,c(est,se,pchg))
}

rownames(twoCols1) <- rownames(twoCols2) <- rownames(twoCols3) <- c("Non Central (t-1)","New Central (t-1)","Repeat Central (t-1)")

### Graphic Depicting Dynamic Relationships ###
threeCent <- c("Not Central","New Central","Persistently Central")

for(i in 1:3){

xi <- est1$x[,i+1]
xoth <- est1$x[,-(i+1)]
betaoth <- coef(est1)[-(i+1)]
lpoth <- t(apply(xoth,2,median))%*%betaoth

xseq <- seq(min(xi),max(xi),length=100)
beta <- coef(est1)[i+1]
se <- summary(est1)$coef[i+1,2]
betasamp <- rnorm(10000,beta,se)

muMat <- NULL
for(j in 1:10000){
	muMat <- rbind(muMat,exp(betasamp[i]*xseq+lpoth))	
}
muEst <- apply(muMat,2,mean)
muLci <- apply(muMat,2,quantile,prob=0.025)
muUci <- apply(muMat,2,quantile,prob=0.975)
	
filei <- paste("/users/brucedesmarais/Dropbox/SenPE/Tex/bigram1_",i,".pdf",sep="")
pdf(filei,height=3.5*.75, width=4*.75, family="Times", pointsize=13.5)
par(las=1,mar=c(4,4,.5,.5),cex.lab=1.25)
plot(xseq,apply(efflist[[i]],2,mean),type="l",ylim=c(2,10),ylab="Mean Bills Passed",xlab="")
polygon(c(xseq,rev(xseq)),c(apply(efflist[[i]],2,quantile,prob=0.975),rev(apply(efflist[[i]],2,quantile,prob=0.025))),col="grey65",border="grey65")
lines(xseq,apply(efflist[[i]],2,mean),lwd=2.5)
rug(ncDat,ticksize=0.075,col=rgb(0,0,0,25,maxColorValue=255))
title(line=2.25,xlab="Press Event Centrality")
dev.off()
}





